/*
Main event study estimates of the child penalty for entrepreneurs.
Produces results for Figures 1, Tables 2, and Table A4.
*/

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"

global figpath $root/estimates/admin/regression

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear
gen any_wage = (wage_earnings >= 10000) & wage_earnings != . if year <= 2018

global outcomes_raw ///
    sold_firm_hazard ///
    operating_profits ///
    profit_before_self ///
    revenues ///
    costs_before_self ///
    owner_salaries ///
    n_emp_excl_owners ///
    lnr_lfirm_salary ///
    value_added ///
    wage_earnings ///
    any_wage
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* $outcomes_raw
    
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw)
    
replace revenues = 0 if revenues < 0
replace owner_salaries = 0 if owner_salaries < 0
replace sold_firm_hazard = . if year == cohort_id - 1 // must own in -1
    
* generate transformations of revenues, salaries
gen pos_rev = (revenues > 0 & revenues != .)
gen log_revenues = log(revenues)
gen log_costs = log(costs_before_self)

* trim variables and impute zeros    
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)
    
trim_impute operating_profits, ///
    year_lb(1996) ///
    year_ub(2019) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)
    
trim_impute value_added, ///
    year_lb(1996) ///
    year_ub(2019) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)
    
trim_impute owner_salaries, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0) ///
    centile_ub(99.5)
    
trim_impute n_emp_excl_owners, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0) ///
    centile_ub(99.5)
    
trim_impute lnr_lfirm_salary, ///
    year_lb(1995) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0) ///
    centile_ub(99.5)
    
trim_impute wage_earnings, ///
    year_lb(1995) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0) ///
    centile_ub(99.5)
    
* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get the sample means in t-1 for the treated
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim ///
                owner_salaries_trim ///
		log_revenues ///
		log_costs ///
		n_emp_excl_owners_trim ///
		pos_rev ///
		operating_profits_trim ///
		value_added_trim ///
		wage_earnings_trim ///
		lnr_lfirm_salary_trim ///
		any_wage ///
		[aweight=X_wt], by(male)
export delimited using ${figpath}/pretreat_means.csv, replace
restore

* estimate for the outcomes where we want percentages 
foreach Y of varlist sold_firm_hazard ///
                     pos_rev ///
		     profit_before_self_trim ///
		     owner_salaries_trim ///
		     operating_profits_trim ///
		     value_added_trim ///
		     lnr_lfirm_salary_trim ///
		     wage_earnings_trim ///
		     any_wage {
		     	
forvalues g = 0/1 {
    aggregate_event_study `Y' if male == `g', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
        store(temp) ///
	percentage ///
        save("${figpath}/`Y'_ATT_male`g'.csv")
`Y'_ATT_male`g'.csv
}


}

* estimate when we don't want percentages
foreach Y of varlist log_revenues ///
		     log_costs ///
		     n_emp_excl_owners_trim {
		     	
forvalues g = 0/1 {
  
    aggregate_event_study `Y' if male == `g', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
        store(temp) ///
        save("${figpath}/`Y'_ATT_male`g'.csv")
`Y'_ATT_male`g'.csv
}


}
